from openai import OpenAI

client = OpenAI(
    base_url="http://127.0.0.1:18804/v1",
    api_key="token-abc123",
)


question = """
介绍一下北京
"""

def chat_completion_test(prompt,stream = True):
    messages = [
        {"role": "system", "content": "你是一个专业的AI助手"},
        {"role": "user", "content": prompt}
    ]


    response = client.chat.completions.create(
      model="Qwen2.5-7B-Instruct-GPTQ-Int8",
      messages=messages,
      max_tokens=4096,
      stream=stream, 
      temperature=0.7,
    )
    
    if stream:
      for chunk in response:
          if chunk_content := chunk.choices[0].delta.content:
            print(chunk_content, end="", flush=True)
    else:
      print(response.choices[0].message.content)
      
if __name__ == "__main__":
    chat_completion_test(question,stream=True)

